EWAS for MDD PRS


Methods

Subjects

From Generation Scotland,

  • Wave 1: N = 4977

  • Wave 2: N = 4312

DNA methylation data and technical covariates

Processed by Rosie, using EPIC array. M values.

Relatedness controlled for by regressing against GRM. Other regressors include batch and cell proportion.

Additional covariates

Ever smoke, pack years, age, sex and 20 methylation PCs.

Pipeline

From Mark, wiki.


EWAS Results

PRS pT=5e-8

PRS pT=5e-8 no MHC region

Number of significant CpGs associated with PRS

CpGs to nearest genome-wide significant locus

95% CI for distance to the nearest MDD risk locus:

  • Significant CpG: 36-3.14495510^{5}
  • Non-significant CpG: 1.16545510{5}-6.039677910{7}

Gene annotation

Pathway analysis

GO.result = read.table(here::here('MR_meth_MDD/result/EWAS_MDDprs_fromKathryn/meta-pgrs-mdd/mdd_SNPCH_prs_5e_08/mdd_SNPCH_prs_5e_08.GO.txt'),header=T,stringsAsFactors=F)
KEGG.result = read.table(here::here('MR_meth_MDD/result/EWAS_MDDprs_fromKathryn/meta-pgrs-mdd/mdd_SNPCH_prs_5e_08/mdd_SNPCH_prs_5e_08.KEGG.txt'),header=T,stringsAsFactors=F)

GO.result.sig=GO.result[order(GO.result$FDR,decreasing=F),] %>%
  filter(.,FDR<0.05)
KEGG.result.sig=KEGG.result[order(KEGG.result$FDR,decreasing=F),] %>%
  filter(.,FDR<0.05)

# pathway analysis for non-MHC probes
pathway.dat=fig.dat.GS.pT.5e_08 %>%
      mutate(is.MHC = if_else(MAPINFO>25000000&MAPINFO<35000000&CHR==6,'Yes','No')) %>%
      filter(.,is.MHC == 'No')
GO.result.sig_noMHC <- 
  gometh(sig.cpg=pathway.dat$CpG[pathway.dat$p.adj<0.05], 
         all.cpg=pathway.dat$CpG, collection="GO",
              array.type = "EPIC") %>%
  .[order(.$P.DE,decreasing=F),] 

KEGG.result.sig_noMHC <- 
  gometh(sig.cpg=pathway.dat$CpG[pathway.dat$p.adj<0.05], 
         all.cpg=pathway.dat$CpG, collection="KEGG",
              array.type = "EPIC") %>%
  .[order(.$FDR,decreasing=F),] 

GO results - all CpG

ID ONTOLOGY TERM N DE P.DE FDR
GO:0002476 BP antigen processing and presentation of endogenous peptide antigen via MHC class Ib 8 8 0 0
GO:0002484 BP antigen processing and presentation of endogenous peptide antigen via MHC class I via ER pathway 7 7 0 0
GO:0002486 BP antigen processing and presentation of endogenous peptide antigen via MHC class I via ER pathway, TAP-independent 7 7 0 0
GO:0042611 CC MHC protein complex 23 17 0 0
GO:0071556 CC integral component of lumenal side of endoplasmic reticulum membrane 26 15 0 0
GO:0098553 CC lumenal side of endoplasmic reticulum membrane 26 15 0 0
GO:0098576 CC lumenal side of membrane 31 15 0 0
GO:0042605 MF peptide antigen binding 23 13 0 0
GO:0002478 BP antigen processing and presentation of exogenous peptide antigen 164 22 0 0
GO:0019884 BP antigen processing and presentation of exogenous antigen 171 22 0 0
GO:0048002 BP antigen processing and presentation of peptide antigen 177 22 0 0
GO:0042613 CC MHC class II protein complex 14 10 0 0
GO:0060333 BP interferon-gamma-mediated signaling pathway 86 17 0 0
GO:0019882 BP antigen processing and presentation 212 22 0 0
GO:0012507 CC ER to Golgi transport vesicle membrane 58 14 0 0
GO:0003823 MF antigen binding 52 13 0 0
GO:0002250 BP adaptive immune response 393 26 0 0
GO:0030176 CC integral component of endoplasmic reticulum membrane 145 18 0 0
GO:0002428 BP antigen processing and presentation of peptide antigen via MHC class Ib 9 8 0 0
GO:0031227 CC intrinsic component of endoplasmic reticulum membrane 153 18 0 0

KEGG results - all CpG

ID Description N DE P.DE FDR
path:hsa04612 Antigen processing and presentation 66 20.0 0e+00 0.0e+00
path:hsa05330 Allograft rejection 34 15.0 0e+00 0.0e+00
path:hsa04940 Type I diabetes mellitus 41 16.0 0e+00 0.0e+00
path:hsa05332 Graft-versus-host disease 37 15.0 0e+00 0.0e+00
path:hsa05320 Autoimmune thyroid disease 46 15.0 0e+00 0.0e+00
path:hsa05416 Viral myocarditis 55 15.0 0e+00 0.0e+00
path:hsa04145 Phagosome 140 18.0 0e+00 0.0e+00
path:hsa05310 Asthma 27 9.0 0e+00 0.0e+00
path:hsa05322 Systemic lupus erythematosus 112 13.5 0e+00 0.0e+00
path:hsa04514 Cell adhesion molecules 135 16.0 0e+00 0.0e+00
path:hsa05150 Staphylococcus aureus infection 82 11.0 0e+00 0.0e+00
path:hsa05169 Epstein-Barr virus infection 193 17.0 0e+00 0.0e+00
path:hsa05166 Human T-cell leukemia virus 1 infection 211 18.5 0e+00 0.0e+00
path:hsa04672 Intestinal immune network for IgA production 43 9.0 0e+00 0.0e+00
path:hsa05168 Herpes simplex virus 1 infection 465 20.0 0e+00 1.0e-07
path:hsa05145 Toxoplasmosis 106 12.0 0e+00 2.0e-07
path:hsa05321 Inflammatory bowel disease 61 9.0 0e+00 4.0e-07
path:hsa05140 Leishmaniasis 68 9.0 0e+00 6.0e-07
path:hsa05323 Rheumatoid arthritis 87 9.0 2e-07 3.0e-06
path:hsa04640 Hematopoietic cell lineage 90 9.0 3e-07 4.6e-06

GO results - no MHC

ONTOLOGY TERM N DE P.DE FDR
GO:0099061 CC integral component of postsynaptic density membrane 50 2 0.0012904 1
GO:0099146 CC intrinsic component of postsynaptic density membrane 53 2 0.0013446 1
GO:0099060 CC integral component of postsynaptic specialization membrane 72 2 0.0021919 1
GO:0098948 CC intrinsic component of postsynaptic specialization membrane 75 2 0.0022612 1
GO:0072686 CC mitotic spindle 101 2 0.0023113 1
GO:0021691 BP cerebellar Purkinje cell layer maturation 2 1 0.0024461 1
GO:0021590 BP cerebellum maturation 3 1 0.0026990 1
GO:0021699 BP cerebellar cortex maturation 3 1 0.0026990 1
GO:1902412 BP regulation of mitotic cytokinesis 6 1 0.0027738 1
GO:0098839 CC postsynaptic density membrane 72 2 0.0028400 1
GO:0099634 CC postsynaptic specialization membrane 97 2 0.0043224 1
GO:0001093 MF TFIIB-class transcription factor binding 3 1 0.0045469 1
GO:0045666 BP positive regulation of neuron differentiation 353 3 0.0045874 1
GO:0021578 BP hindbrain maturation 6 1 0.0046396 1
GO:0070369 CC beta-catenin-TCF7L2 complex 3 1 0.0049050 1
GO:0021626 BP central nervous system maturation 7 1 0.0049102 1
GO:0043515 MF kinetochore binding 6 1 0.0053904 1
GO:0099055 CC integral component of postsynaptic membrane 113 2 0.0054125 1
GO:0098936 CC intrinsic component of postsynaptic membrane 118 2 0.0057831 1
GO:0051315 BP attachment of mitotic spindle microtubules to kinetochore 13 1 0.0058028 1

KEGG results - no MHC

Description N DE P.DE FDR
path:hsa00010 Glycolysis / Gluconeogenesis 64 0 1 1
path:hsa00020 Citrate cycle (TCA cycle) 28 0 1 1
path:hsa00030 Pentose phosphate pathway 25 0 1 1
path:hsa00040 Pentose and glucuronate interconversions 32 0 1 1
path:hsa00051 Fructose and mannose metabolism 32 0 1 1
path:hsa00052 Galactose metabolism 29 0 1 1
path:hsa00053 Ascorbate and aldarate metabolism 27 0 1 1
path:hsa00061 Fatty acid biosynthesis 15 0 1 1
path:hsa00062 Fatty acid elongation 26 0 1 1
path:hsa00071 Fatty acid degradation 42 0 1 1
path:hsa00072 Synthesis and degradation of ketone bodies 9 0 1 1
path:hsa00100 Steroid biosynthesis 18 0 1 1
path:hsa00120 Primary bile acid biosynthesis 17 0 1 1
path:hsa00130 Ubiquinone and other terpenoid-quinone biosynthesis 11 0 1 1
path:hsa00140 Steroid hormone biosynthesis 56 0 1 1
path:hsa00190 Oxidative phosphorylation 115 0 1 1
path:hsa00220 Arginine biosynthesis 20 0 1 1
path:hsa00230 Purine metabolism 125 0 1 1
path:hsa00232 Caffeine metabolism 5 0 1 1
path:hsa00240 Pyrimidine metabolism 55 0 1 1

Summary of EWAS for MDD PRS pT = 5e-8

# 1. No. of significant CpGs in the MHC region
ewas.pTwgsig = ewas.pTwgsig %>%
  mutate(is.MHC = if_else(MAPINFO>25000000&MAPINFO<35000000&CHR==6,'Yes','No'))
no.MHC.sig = table(ewas.pTwgsig$is.MHC[ewas.pTwgsig$p.adj<0.05])
no.MHC.sig
## 
##  No Yes 
##  37 688
# 2. Top 10 CpGs
top.10.sig = ewas.pTwgsig %>%
  filter(.,p.adj<0.05) %>%
  .[order(.$P.value,decreasing = F),] 
top.10.sig[1:10,] %>%
    kbl() %>%
  kable_material(c("striped", "hover"))
CpG Allele1 Allele2 Effect StdErr P.value Direction HetISq HetChiSq HetDf HetPVal CHR MAPINFO p.adj distance_to_nearest_gwashit EWAS.sig is.MHC
84 cg03270340 NA NA 55.5951 2.4688 0 ++ 97.6 41.064 1 0 6 28891204 0 3551 yes Yes
29 cg00903577 NA NA 63.8098 2.8351 0 ++ 99.1 106.379 1 0 6 28831109 0 3099 yes Yes
354 cg12914966 NA NA 79.4726 3.6703 0 ++ 99.4 171.532 1 0 6 28830789 0 2779 yes Yes
470 cg16677399 NA NA 51.7185 2.4974 0 ++ 99.5 212.566 1 0 6 28830902 0 2892 yes Yes
389 cg14345882 NA NA -51.2371 2.5431 0 – 99.3 148.961 1 0 6 26364793 0 137 yes Yes
182 cg06608359 NA NA 59.9925 3.0263 0 ++ 97.1 34.908 1 0 6 28831300 0 3290 yes Yes
678 cg25643361 NA NA -44.1782 2.3263 0 – 98.1 53.559 1 0 6 26361389 0 41 yes Yes
721 cg27543291 NA NA -55.5490 2.9654 0 – 99.3 148.203 1 0 6 26367644 0 10 yes Yes
282 cg10046620 NA NA -41.8334 2.2423 0 – 98.5 66.796 1 0 6 27775042 0 14 yes Yes
233 cg08168669 NA NA -38.2512 2.0864 0 – 99.4 172.357 1 0 6 26367580 0 74 yes Yes
top10.max.p=max(top.10.sig$p.adj[1:10])

# 3. significant CpGs outside of the MHC region
top.10.sig.noMHC = ewas.pTwgsig %>%
  filter(.,p.adj<0.05) %>%
  filter(.,is.MHC=='No') %>%
  .[order(.$P.value,decreasing = F),] 
summary(top.10.sig.noMHC$p.adj)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.000e+00 6.480e-06 1.515e-03 9.472e-03 7.844e-03 4.836e-02
# 4. Top 10 non-MHC CpGs
top.10.sig.noMHC[1:10,] %>%
    kbl() %>%
  kable_material(c("striped", "hover"))
CpG Allele1 Allele2 Effect StdErr P.value Direction HetISq HetChiSq HetDf HetPVal CHR MAPINFO p.adj distance_to_nearest_gwashit EWAS.sig is.MHC
7 cg05590274 NA NA 16.9936 1.9302 0 ++ 94.4 17.997 1 0.0000221 11 113262625 0.0e+00 15822 yes No
26 cg17841099 NA NA -12.1025 1.5366 0 – 97.9 48.399 1 0.0000000 1 153940090 0.0e+00 21962506 yes No
35 cg26200795 NA NA 18.7156 2.4494 0 ++ 91.3 11.449 1 0.0007152 18 52895482 0.0e+00 5322 yes No
8 cg06276712 NA NA -12.4238 1.6339 0 – 96.6 29.631 1 0.0000001 7 12107011 0.0e+00 140319 yes No
27 cg17862947 NA NA 8.2523 1.1073 0 ++ 99.6 285.470 1 0.0000000 12 133086926 1.0e-07 11712199 yes No
6 cg05328885 NA NA 30.1986 4.1802 0 ++ 95.9 24.148 1 0.0000009 11 30943623 4.0e-07 1541 yes No
5 cg04317648 NA NA 11.9459 1.6968 0 ++ 86.6 7.456 1 0.0063220 1 8485376 1.5e-06 553 yes No
4 cg02403154 NA NA -14.5513 2.0861 0 – 0.0 0.386 1 0.5345000 18 52989223 2.3e-06 17013 yes No
22 cg14159747 NA NA -17.4837 2.5178 0 – 93.7 15.880 1 0.0000675 11 113255604 2.9e-06 22843 yes No
15 cg10154826 NA NA 36.6969 5.3737 0 ++ 96.1 25.769 1 0.0000004 6 17600994 6.5e-06 572321 yes No
top.10.sig.noMHC.sCHR=top.10.sig.noMHC[1:10,] %>%
  .[order(.$CHR),]
top10.noMHC.max.p=max(top.10.sig.noMHC$p.adj[1:10])


EWAS replication in LBC & ALSPAC

Methods

Subjects

  • From LBC: N = 1330
  • From ALSPAC: N = 791

Data

LBC:

  • DNA methylation: Processed by Riccardo. 450k array. M-values.

  • Genetic: HRCv1.1 imputed data

  • LD reference: 1000G CEU sample, phase 3, hg19 genome build

ALSPAC:

  • DNA methylation: Processed by Riccardo. 450k array. M-values.

  • Genetic: HRCv1.1 imputed data

  • LD reference: 1000G CEU sample, phase 3, hg19 genome build

Covariates

Smoking status, age, sex, 20 methylation PCs, and cell proportions.

Pipeline

LBC:

From Mark, wiki.

Local copy (adapted to LBC data): /exports/igmm/eddie/GenScotDepression/shen/Tools/ewas_pipeline/

ALSPAC:

Run by Doretta

Results

Manhattan Plot

Correlation of beta for significant CpGs found in GS

## `geom_smooth()` using formula 'y ~ x'

Summry of replication analysis: on CpGs significant in GS (nCpG = 620)

  • Correlation between betas in GS and LBC for all CpGs: r = 0.929

  • Correlation between betas in GS and LBC for CpGs not in MHC region: r = 0.738

  • Betas remained in the same direction in LBC: 0%

  • CpGs nominally significant in LBC: 0%

Number of significant CpGs associated with PRS


SNP to CpG mapping


## png 
##   2


Mendelian Randomisation: CpG -> Brain cortical structure (discovery)


Methods

Datasets

  • Exposure: mQTL data from GoDMC (N = 20,396). Cohorts that overlap with PGC29 were removed.

  • Outcome: Cortical thickness and cortical surface area (UKB, N~=40k)

Instruments

  • In the EWAS of MDD PRS (pT = 5e-8), a total of 725 CpGs were significant after Bonferroni correction.

  • 615 had more than 30 genetic instruments available.

  • 18 CpGs left after ‘clumping’ (window = 3Mb, r=0.1) in the MHC region.

  • Finally, 20 CpGs with >=5 genetic instruments after data harmonisation entered MR.

MR methods

Results

25 CpGs tested:

Network plot: DNAm -> brain measures

reorg_fornemat <- function(tmp.res,targetmethod,tmp.trait,pval_col){
  new.res = tmp.res[[tmp.trait]] %>%
    filter(.,method==targetmethod) %>%
     mutate(.,outcome=tmp.trait,p.adj=p.adjust(get(pval_col),method='fdr')) 
}

ivw.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Inverse variance weighted',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

egger.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='MR Egger',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval))

wm.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Weighted median',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

ivw.res$outcome = recode(ivw.res$outcome,
                         GMsa = 'Surface area',
                         GMthk = 'Cortical thickness',
                         GMvol = 'Cortical volume',
                         WMgMD = 'global MD',
                         WMgFA_TR = 'gFA in\nthalamic radiation',
                         WMgMD_TR = 'gFA in\nthalamic radiation',
                         WMFA_ATR = 'FA in anterior\nthalamic radiation',
                         WMMD_ATR = 'MD in anterior\nthalamic radiation')

egger.res$outcome = recode(egger.res$outcome,
                         GMsa = 'Surface area',
                         GMthk = 'Cortical thickness',
                         GMvol = 'Cortical volume',
                         WMgMD = 'global MD',
                         WMgFA_TR = 'gFA in\nthalamic radiation',
                         WMgMD_TR = 'gFA in\nthalamic radiation',
                         WMFA_ATR = 'FA in anterior\nthalamic radiation',
                         WMMD_ATR = 'MD in anterior\nthalamic radiation')

wm.res$outcome = recode(wm.res$outcome,
                         GMsa = 'Surface area',
                         GMthk = 'Cortical thickness',
                         GMvol = 'Cortical volume',
                         WMgMD = 'global MD',
                         WMgFA_TR = 'gFA in\nthalamic radiation',
                         WMgMD_TR = 'gFA in\nthalamic radiation',
                         WMFA_ATR = 'FA in anterior\nthalamic radiation',
                         WMMD_ATR = 'MD in anterior\nthalamic radiation')


# Prep data for graph 

fig.1=circular_graph_mr(target.res = ivw.res)
fig.2=circular_graph_mr(target.res = egger.res,target.col='pval')
fig.3=circular_graph_mr(target.res = wm.res)

fig.total = ggarrange(fig.1,fig.2,fig.3,ncol=3,
                      labels = c('IVW','MR Egger','Weighted-median'),common.legend = T)

ggsave(fig.total,device = 'png',height=15,width = 40,units = 'cm',dpi=300,
       filename = here::here('MR_meth_MDD/Figs/MR_DNAm_to_brain/MR_res.png'))
fig.total

Table: DNAm -> brain measures

# Restore pcorr for MR Egger

mr.res.p = data.frame(exposure=ivw.res$exposure,outcome=ivw.res$outcome,
                      ivw.padj=ivw.res$p.adj,ivw.p=ivw.res$pval,
                      mregger.padj=egger.res$p.adj,mregger.p=egger.res$pval,
                      wm.padj=wm.res$p.adj,wm.p=wm.res$pval) %>%
  mutate(sig_over2 = if_else(rowSums(select(.,ends_with('.padj'))<0.05)>=2,1,0))
mr.res.beta = data.frame(exposure=ivw.res$exposure,outcome=ivw.res$outcome,
                         ivw.beta=ivw.res$b,
                      mregger.beta=egger.res$b,wm.beta=wm.res$b) %>%
  mutate(is.sign.consis = if_else(ivw.beta/mregger.beta>0&ivw.beta/wm.beta>0,1,0))

mr.summary=left_join(mr.res.p,mr.res.beta,by=c('exposure','outcome')) %>%
  mutate(is.valid = if_else(.$is.sign.consis==1&.$sig_over2==1,1,0)) %>%
  .[order(.$is.valid,decreasing = T),]

mr.sig = mr.summary %>%
  filter(.,is.valid==1)

QC.table = ivw.res %>%
  select(outcome,exposure,`Egger intercept`=egger_intercept,`Egger pval`=pval.1,
         Q,`Q pval`=Q_pval,Nsnp=nsnp)

mr.sig.output = mr.sig %>%
  select(-is.valid,-sig_over2,-is.sign.consis) %>%
  left_join(.,QC.table,by=c('exposure','outcome')) %>%
  .[order(.$exposure,.$outcome),] %>%
  select(ends_with('beta'),ends_with('.p'),ends_with('.padj'),
         everything()) %>%
  select(exposure,outcome,Nsnp,
         starts_with('ivw'),starts_with('wm'),starts_with('mregger'),
         everything()) %>%
  mutate_if(is.numeric, format, digits=3,nsmall = 0)

rownames(mr.sig.output)=NULL

# Make table
mr.sig.output %>%
  kbl(booktabs = TRUE) %>%
  kable_material(c("striped", "hover")) %>%
  add_header_above(c(" " = 3, "IVW" = 3, "WM" = 3, "MR-Egger" = 3, 
                     "QC Egger"=2,"QC Q (heterogeneity)"=2))
IVW
WM
MR-Egger
QC Egger
QC Q (heterogeneity)
exposure outcome Nsnp ivw.beta ivw.p ivw.padj wm.beta wm.p wm.padj mregger.beta mregger.p mregger.padj Egger intercept Egger pval Q Q pval
cg03270340 gFA 9 -2.96e-03 4.93e-03 0.029592 -3.82e-03 3.96e-13 2.38e-12 -4.62e-03 0.0525 0.473 1.14e-03 0.356 49.57 4.95e-08
cg03270340 gMD 9 4.80e-06 2.44e-03 0.014668 6.16e-06 2.84e-14 1.02e-13 7.85e-06 0.0292 0.397 -2.09e-06 0.250 44.76 4.08e-07
cg03270340 TotalVol 9 2.67e-02 1.52e-05 0.000137 2.46e-02 8.43e-04 2.53e-03 1.63e-02 0.1995 0.738 7.82e-03 0.322 8.50 3.86e-01
cg07519229 TotalSurface 9 -3.64e-02 2.38e-05 0.000428 -3.58e-02 5.48e-04 4.93e-03 -4.57e-02 0.0639 0.480 3.99e-03 0.634 10.95 2.04e-01
cg08116408 gFA 18 5.22e-03 7.80e-05 0.000702 5.17e-03 5.20e-14 4.68e-13 4.69e-03 0.1159 0.695 2.73e-04 0.833 110.44 9.99e-16
cg08116408 gFA_TR 18 2.21e-03 3.19e-04 0.002873 2.21e-03 7.17e-11 6.45e-10 1.86e-03 0.1760 0.569 1.85e-04 0.760 95.67 5.59e-13
cg08116408 gMD 18 -8.75e-06 1.12e-04 0.001010 -9.00e-06 1.23e-16 5.53e-16 -7.81e-06 0.1259 0.566 -4.85e-07 0.827 129.62 2.22e-19
cg08116408 gMD_TR 18 -3.68e-06 4.45e-04 0.004004 -3.52e-06 4.21e-10 1.89e-09 -2.45e-06 0.2857 0.696 -6.40e-07 0.533 85.98 3.26e-11
cg08116408 TotalVol 18 -4.53e-02 2.54e-05 0.000152 -4.39e-02 1.74e-06 1.04e-05 -3.25e-02 0.1714 0.738 -6.78e-03 0.527 38.97 1.80e-03
cg08344181 gMD 40 2.42e-06 6.75e-03 0.030362 5.54e-06 6.03e-19 5.43e-18 2.40e-06 0.2453 0.649 1.65e-08 0.990 245.97 7.55e-32
cg08344181 gMD_TR 40 1.23e-06 2.39e-03 0.014333 2.42e-06 2.80e-12 1.68e-11 1.29e-06 0.1696 0.610 -4.22e-08 0.943 156.10 6.08e-16
cg08344181 TotalVol 40 1.46e-02 1.04e-02 0.037536 2.66e-02 1.57e-06 1.04e-05 7.26e-03 0.5968 0.842 5.49e-03 0.558 126.39 3.72e-11
cg14345882 TotalVol 18 -3.12e-02 1.10e-04 0.000494 -2.87e-02 2.83e-04 1.02e-03 -1.92e-02 0.0964 0.738 -7.45e-03 0.135 29.03 3.43e-02
cg17862947 gMD 39 2.15e-06 8.52e-03 0.030657 5.02e-06 2.65e-18 1.59e-17 2.41e-06 0.1832 0.649 -2.10e-07 0.867 247.28 1.67e-32
cg17862947 gMD_TR 39 1.06e-06 4.52e-03 0.020355 2.18e-06 1.44e-12 1.30e-11 1.50e-06 0.0732 0.610 -3.43e-07 0.549 160.69 4.99e-17
cg17862947 TotalVol 39 1.27e-02 1.34e-02 0.040255 2.37e-02 2.34e-06 1.05e-05 9.96e-03 0.4101 0.738 2.29e-03 0.800 124.97 3.34e-11
cg17925084 MeanThk 6 -4.67e-02 7.49e-04 0.004492 -5.66e-02 1.42e-04 2.55e-03 -3.69e-02 0.4303 0.859 -3.56e-03 0.815 6.75 2.40e-01
ls.discovery = mr.sig.output[,c('exposure','outcome')]

Mendelian Randomisation: CpG -> Brain cortical structure (replication)


Methods

Datasets

  • Exposure: mQTL data from GS (N ~= 10,000).

  • Outcome: Cortical thickness and cortical surface area (UKB, N~=40k)

Instruments

  • CpGs showed causal effect to any brain structural measure in the discovery analysis.

MR methods

Results

25 CpGs tested:

Network plot: DNAm -> brain measures

reorg_fornemat <- function(tmp.res,targetmethod,tmp.trait,pval_col){
  new.res = tmp.res[[tmp.trait]] %>%
    filter(.,method==targetmethod) %>%
     mutate(.,outcome=tmp.trait,p.adj=p.adjust(get(pval_col),method='fdr')) 
}

ivw.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Inverse variance weighted',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

egger.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='MR Egger',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval))

wm.res = as.list(names(res)) %>%
  lapply(.,reorg_fornemat,tmp.res=res,
         targetmethod='Weighted median',pval_col='pval') %>%
  bind_rows %>%
  mutate(log.p.val=-log10(pval)) 

ivw.res$outcome = recode(ivw.res$outcome,
                         GMsa = 'Surface area',
                         GMthk = 'Cortical thickness',
                         GMvol = 'Cortical volume',
                         WMgMD = 'global MD',
                         WMgFA_TR = 'gFA in\nthalamic radiation',
                         WMgMD_TR = 'gFA in\nthalamic radiation',
                         WMFA_ATR = 'FA in anterior\nthalamic radiation',
                         WMMD_ATR = 'MD in anterior\nthalamic radiation')

egger.res$outcome = recode(egger.res$outcome,
                         GMsa = 'Surface area',
                         GMthk = 'Cortical thickness',
                         GMvol = 'Cortical volume',
                         WMgMD = 'global MD',
                         WMgFA_TR = 'gFA in\nthalamic radiation',
                         WMgMD_TR = 'gFA in\nthalamic radiation',
                         WMFA_ATR = 'FA in anterior\nthalamic radiation',
                         WMMD_ATR = 'MD in anterior\nthalamic radiation')

wm.res$outcome = recode(wm.res$outcome,
                         GMsa = 'Surface area',
                         GMthk = 'Cortical thickness',
                         GMvol = 'Cortical volume',
                         WMgMD = 'global MD',
                         WMgFA_TR = 'gFA in\nthalamic radiation',
                         WMgMD_TR = 'gFA in\nthalamic radiation',
                         WMFA_ATR = 'FA in anterior\nthalamic radiation',
                         WMMD_ATR = 'MD in anterior\nthalamic radiation')


# Prep data for graph 

fig.1=circular_graph_mr(target.res = ivw.res)
fig.2=circular_graph_mr(target.res = egger.res,target.col='pval')
fig.3=circular_graph_mr(target.res = wm.res)

fig.total = ggarrange(fig.1,fig.2,fig.3,ncol=3,
                      labels = c('IVW','MR Egger','Weighted-median'),common.legend = T)

ggsave(fig.total,device = 'png',height=15,width = 40,units = 'cm',dpi=300,
       filename = here::here('MR_meth_MDD/Figs/MR_DNAm_to_brain/MR_res.png'))
fig.total

Table: DNAm -> brain measures

# Restore pcorr for MR Egger

mr.res.p = data.frame(exposure=ivw.res$exposure,outcome=ivw.res$outcome,
                      ivw.padj=ivw.res$p.adj,ivw.p=ivw.res$pval,
                      mregger.padj=egger.res$p.adj,mregger.p=egger.res$pval,
                      wm.padj=wm.res$p.adj,wm.p=wm.res$pval) %>%
  mutate(sig_over2 = if_else(rowSums(select(.,ends_with('.padj'))<0.05)>=2,1,0))
mr.res.beta = data.frame(exposure=ivw.res$exposure,outcome=ivw.res$outcome,
                         ivw.beta=ivw.res$b,
                      mregger.beta=egger.res$b,wm.beta=wm.res$b) %>%
  mutate(is.sign.consis = if_else(ivw.beta/mregger.beta>0&ivw.beta/wm.beta>0,1,0))


mr.summary=left_join(mr.res.p,mr.res.beta,by=c('exposure','outcome')) %>%
  mutate(is.valid = if_else(.$is.sign.consis==1&.$sig_over2==1,1,0)) %>%
  .[order(.$is.valid,decreasing = T),]

mr.sig = mr.summary %>%
  filter(.,is.valid==1)

QC.table = ivw.res %>%
  select(outcome,exposure,`Egger intercept`=egger_intercept,`Egger pval`=pval.1,
         Q,`Q pval`=Q_pval,Nsnp=nsnp)

mr.sig.output = mr.sig %>%
  select(-is.valid,-sig_over2,-is.sign.consis) %>%
  left_join(.,QC.table,by=c('exposure','outcome')) %>%
  left_join(ls.discovery,.,by=c('exposure','outcome')) %>%
  .[order(.$exposure,.$outcome),] %>%
  select(ends_with('beta'),ends_with('.p'),ends_with('.padj'),
         everything()) %>%
  select(exposure,outcome,Nsnp,
         starts_with('ivw'),starts_with('wm'),starts_with('mregger'),
         everything()) %>%
  mutate_if(is.numeric, format, digits=3,nsmall = 0)

colnames(mr.sig.output) = colnames(mr.sig.output) %>%
  gsub('ivw.','',.) %>%
  gsub('wm.','',.) %>%
  gsub('mregger.','',.)

rownames(mr.sig.output)=NULL

# Make table
mr.sig.output %>%
  kbl(booktabs = TRUE) %>%
  kable_material(c("striped", "hover")) %>%
  add_header_above(c(" " = 3, "IVW" = 3, "WM" = 3, "MR-Egger" = 3, 
                     "QC Egger"=2,"QC Q (heterogeneity)"=2))
IVW
WM
MR-Egger
QC Egger
QC Q (heterogeneity)
exposure outcome Nsnp beta p padj beta p padj beta p padj Egger intercept Egger pval Q Q pval
cg03270340 gFA 38 -9.51e-03 3.72e-08 5.21e-08 -1.39e-02 7.02e-26 2.46e-25 -1.00e-02 0.010041 0.01406 9.79e-05 0.8756 127.0 8.61e-12
cg03270340 gMD 38 1.62e-05 4.23e-08 5.92e-08 2.32e-05 6.28e-29 1.61e-28 1.55e-05 0.018459 0.02584 1.20e-07 0.9106 148.0 3.21e-15
cg03270340 TotalVol 38 9.41e-02 9.83e-10 1.38e-09 1.07e-01 7.34e-09 1.71e-08 3.18e-02 0.339392 0.47515 1.25e-02 0.0404 50.3 7.05e-02
cg07519229 TotalSurface NA NA NA NA NA NA NA NA NA NA NA NA NA NA
cg08116408 gFA 41 2.28e-02 6.20e-12 1.11e-11 3.05e-02 1.55e-25 3.62e-25 2.40e-02 0.008431 0.01406 -1.08e-04 0.8879 122.7 2.50e-10
cg08116408 gFA_TR 41 9.59e-03 1.45e-09 2.53e-09 1.29e-02 3.89e-18 6.80e-18 9.54e-03 0.025992 0.03639 4.69e-06 0.9898 111.8 1.04e-08
cg08116408 gMD 41 -4.02e-05 3.65e-12 8.52e-12 -5.28e-05 1.08e-32 7.59e-32 -4.18e-05 0.008334 0.01459 1.52e-07 0.9088 148.3 2.36e-14
cg08116408 gMD_TR 41 -1.81e-05 9.14e-11 2.13e-10 -2.16e-05 2.21e-18 1.54e-17 -1.51e-05 0.043792 0.07664 -2.86e-07 0.6547 106.6 5.75e-08
cg08116408 TotalVol 41 -1.98e-01 1.30e-10 2.27e-10 -2.26e-01 5.84e-09 1.71e-08 -1.80e-01 0.047935 0.08389 -1.77e-03 0.8289 52.9 8.34e-02
cg08344181 gMD 29 8.79e-05 3.53e-17 1.23e-16 1.09e-04 6.90e-29 1.61e-28 1.06e-04 0.000894 0.00243 -9.07e-07 0.5002 68.7 2.83e-05
cg08344181 gMD_TR 29 4.12e-05 1.44e-19 1.01e-18 4.20e-05 2.19e-15 7.66e-15 4.68e-05 0.000810 0.00283 -2.87e-07 0.6262 40.4 6.04e-02
cg08344181 TotalVol 29 4.84e-01 1.35e-15 9.45e-15 5.51e-01 1.04e-11 7.25e-11 3.79e-01 0.036361 0.08389 5.42e-03 0.5166 29.5 3.87e-01
cg14345882 TotalVol 40 -1.13e-01 2.61e-13 9.15e-13 -1.15e-01 1.11e-08 1.94e-08 -1.11e-01 0.002006 0.01404 -4.19e-04 0.9377 46.6 1.90e-01
cg17862947 gMD 24 1.05e-04 2.25e-21 1.57e-20 1.15e-04 4.36e-27 7.62e-27 1.11e-04 0.001040 0.00243 -2.73e-07 0.8317 48.9 1.28e-03
cg17862947 gMD_TR 24 4.51e-05 1.91e-13 6.67e-13 4.40e-05 5.03e-13 7.05e-13 4.21e-05 0.016905 0.03945 1.42e-07 0.8417 46.4 2.65e-03
cg17862947 TotalVol 24 4.78e-01 5.79e-11 1.35e-10 4.75e-01 7.17e-08 1.00e-07 4.59e-01 0.032865 0.08389 8.92e-04 0.9215 28.4 2.01e-01
cg17925084 MeanThk NA NA NA NA NA NA NA NA NA NA NA NA NA NA

p plot for MR: discovery, replication and bidirectional MR

p.plot.mr = function(tmp.res,tmp.facetcol='exposure',tmp.ycol='outcome',tmp.title){
dat.fig = base::Reduce(function(x,y) rbind(x,y),tmp.res) %>%
  right_join(.,mr.sig.output[,c('exposure','outcome')],var=c('exposure','outcome')) %>%
  dplyr::rename(Beta=b,SE=se,Method=method) %>%
  .[order(.$pval,decreasing = T),] %>%
  mutate(ord=1:nrow(.)) 

fig.p =
  ggplot(dat.fig, aes(x=reorder(get(tmp.ycol),ord), y=-log10(pval), color=Method)) +
  geom_point(position=position_dodge(width = 0.1), stat="identity", size=2) +
  facet_grid(rows = vars(get(tmp.facetcol)))+
  theme(
    axis.line.x = element_line(size=0.5),
    axis.text=element_text(size=10), axis.title=element_text(size=11),
    plot.title = element_text(lineheight=1, face="bold", vjust=1, hjust=0.5,size=9),
    strip.text = element_text(size=8),
    plot.margin=unit(c(1,1,1,3),'mm')) +
  scale_x_discrete(position='top')+
  geom_hline(yintercept=0,color = "black", size=0.3)+
  geom_hline(yintercept=-log10(0.05),color = "red",linetype='dashed', size=0.3)+
  coord_flip()+
  ylab('P-value for MR')+
  xlab('CpG') +
  ylim(0,40)+
  ggtitle(tmp.title)

 return(fig.p)
}

# Discovery analysis
res = as.list(paste0('/gpfs/igmmfs01/eddie/GenScotDepression/shen/ActiveProject/Genetic/MR_meth_MDD/result/GoDMC_MR_DNAm_to_Brain/Summary_DNAm_to_',ls.measure,'.csv')) %>% 
  lapply(.,FUN=read.csv,header=T,sep='\t',stringsAsFactors = F)
names(res)=ls.measure
fig.p.discovery.mr = p.plot.mr(res,tmp.title='DNAm (GoDMC) to Brain')
## Joining, by = c("outcome", "exposure")
# Replication analysis
res = as.list(paste0('/gpfs/igmmfs01/eddie/GenScotDepression/shen/ActiveProject/Genetic/MR_meth_MDD/result/GS_MR_DNAm_to_Brain/Summary_DNAm_to_',ls.measure,'.csv')) %>% 
  lapply(.,FUN=read.csv,header=T,sep='\t',stringsAsFactors = F)
names(res)=ls.measure
fig.p.replication.mr = p.plot.mr(res,tmp.title='DNAm (GS) to Brain')
## Joining, by = c("outcome", "exposure")
# Reversed direction MR
res = as.list(list.files('/gpfs/igmmfs01/eddie/GenScotDepression/shen/ActiveProject/Genetic/MR_meth_MDD/result/GS_MR_Brain_to_DNAm/',pattern='^Summary',full.names = T)) %>% 
  lapply(.,FUN=read.csv,header=T,sep='\t',stringsAsFactors = F) %>%
  lapply(.,dplyr::rename,outcome=exposure,exposure=outcome)
names(res)=list.files('/gpfs/igmmfs01/eddie/GenScotDepression/shen/ActiveProject/Genetic/MR_meth_MDD/result/GS_MR_Brain_to_DNAm/',pattern='^Summary') %>%
  gsub('Summary_Brain_to_','',.) %>%
  gsub('.csv','',.)
fig.p.reverse.mr = p.plot.mr(res,tmp.title='Brain to DNAm (GS)')
## Joining, by = c("exposure", "outcome")
fig.total = ggarrange(fig.p.discovery.mr,fig.p.replication.mr,fig.p.reverse.mr,ncol=3,
                      common.legend = T)

ggsave(fig.total,device = 'png',height=19,width = 48,units = 'cm',dpi=300,
       filename = here::here('MR_meth_MDD/Figs/MR_res_pplot.png'))
fig.total